***generate regression results***

*specifying panel data*
destring city_id, replace
xtset city_id year

*Table 4.1 column 1: baseline regression without dirth sector*
xtreg lnso2_emi ind_val_hmt_per ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real i.year, fe robust

*find and install outreg2 in stata in order to create regression tables (click on "outreg2" in the window popped out and then click on "click here to install" in the next window*
findit outreg2

*Create Table 4.1 column 1 in word document table4.1.doc (note: can view myreg.doc but it needs to be closed before we can append column 2 to the table*
outreg2 table4.1.doc using "table4.1.doc", se title("Table 4.1 Baseline regressions") ctitle("","Baseline without dirty sector") replace drop(i.year) addtext(Year FE, YES, City FE, YES) label

*baseline regression*
xtreg lnso2_emi ind_val_hmt_per ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year, fe robust

*Append column 2 to Table 4.1 (note: need to close myreg.doc first before running this code)*
outreg2 table4.1.doc using "table4.1.doc", se ctitle("","Baseline") append drop(i.year) addtext(Year FE, YES, City FE, YES) label

*Table 4.2 column 1: WLS regression with interactions between firm share and administrative level only*
reg lnso2_emi c.ind_val_hmt_per#i.level c.ind_val_fie_per#i.level lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id
hettest level1 level2 level3 level4 
predict uhat1, residuals
gen uhat1_sq=uhat1^2
reg uhat1_sq level1 level2 level3 level4 
predict vhat1, xb
gen vhat1_inv=1/vhat1
reg lnso2_emi c.ind_val_hmt_per#i.level c.ind_val_fie_per#i.level lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id [aweight=vhat1_inv]

*Create Table 4.2 column 1 in word document table4.2.doc (note: "#" prevents labels for the interaction terms to be shown in the table, so I mannually edited the labels in the regression table for the interaction terms; same goes to column 2*
outreg2 table4.2.doc using "table4.2.doc", se title("Table 4.2 WLS regressions across city administrative levels and regions") ctitle("","Across levels") replace keep (1b.level#c.ind_val_hmt_per 2.level#c.ind_val_hmt_per 3.level#c.ind_val_hmt_per 4.level#c.ind_val_hmt_per 1b.level#c.ind_val_fie_per 2.level#c.ind_val_fie_per 3.level#c.ind_val_fie_per 4.level#c.ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per) addtext(Year FE, YES, City FE, YES) label

*Table 4.2 column 2: WLS regression with interactions between firm share and region only*
reg lnso2_emi lnind_val_real c.ind_val_hmt_per#i.region c.ind_val_fie_per#i.region lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id
hettest region1 region2 region3 region4 region5 region6 region7
predict uhat2, residuals
gen uhat2_sq=uhat2^2
reg uhat2_sq region1 region2 region3 region4 region5 region6 region7
predict vhat2, xb
gen vhat2_inv=1/vhat2
reg lnso2_emi c.ind_val_hmt_per#i.region c.ind_val_fie_per#i.region lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id [aweight=vhat2_inv]

*Append column 2 to Table 4.2 *
outreg2 table4.2.doc using "table4.2.doc", se ctitle("","Across regions") append keep (0b.region#c.ind_val_hmt_per 1.region#c.ind_val_hmt_per 2.region#c.ind_val_hmt_per 3.region#c.ind_val_hmt_per 4.region#c.ind_val_hmt_per 5.region#c.ind_val_hmt_per 6.region#c.ind_val_hmt_per 0b.region#c.ind_val_fie_per 1.region#c.ind_val_fie_per 2.region#c.ind_val_fie_per 3.region#c.ind_val_fie_per 4.region#c.ind_val_fie_per 5.region#c.ind_val_fie_per 6.region#c.ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per) addtext(Year FE, YES, City FE, YES) label

*WLS regression with administrative level interactions and region interactions combined*
reg lnso2_emi lnind_val_real c.ind_val_hmt_per#i.level c.ind_val_fie_per#i.level c.ind_val_hmt_per#i.region c.ind_val_fie_per#i.region lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id
hettest level1 level2 level3 level4 region1 region2 region3 region4 region5 region6 region7
predict uhat3, residuals
gen uhat3_sq=uhat3^2
reg uhat3_sq level1 level2 level3 level4 region1 region2 region3 region4 region5 region6 region7
predict vhat3, xb
gen vhat3_inv=1/vhat3
reg lnso2_emi c.ind_val_hmt_per#i.level c.ind_val_fie_per#i.level c.ind_val_hmt_per#i.region c.ind_val_fie_per#i.region lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id [aweight=vhat3_inv]

*Append column 3 to Table 4.2*
outreg2 myreg.doc using "table4.2.doc", se ctitle("","Across levels and regions") append keep (1b.level#c.ind_val_hmt_per 2.level#c.ind_val_hmt_per 3.level#c.ind_val_hmt_per 4.level#c.ind_val_hmt_per 1b.level#c.ind_val_fie_per 2.level#c.ind_val_fie_per 3.level#c.ind_val_fie_per 4.level#c.ind_val_fie_per 0b.region#c.ind_val_hmt_per 1.region#c.ind_val_hmt_per 2.region#c.ind_val_hmt_per 3.region#c.ind_val_hmt_per 4.region#c.ind_val_hmt_per 5.region#c.ind_val_hmt_per 6.region#c.ind_val_hmt_per 0b.region#c.ind_val_fie_per 1.region#c.ind_val_fie_per 2.region#c.ind_val_fie_per 3.region#c.ind_val_fie_per 4.region#c.ind_val_fie_per 5.region#c.ind_val_fie_per 6.region#c.ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per) addtext(Year FE, YES, City FE, YES) label

***Summary statistics***

*find and install fsum in stata to create summary statistics table*
findit fsum

*Create Table 3.2 summary statistics*
fsum so2_emi ind_val_hmt_per ind_val_fie_per ind_val_real gdp_pc_real kl_ratio_real emp_dirty_per level region, stat(min sd mean max) uselabel

***Data appendix***

*summary statistics*
sum so2_emi, detail
sum ind_val_hmt_per, detail
sum ind_val_fie_per, detail
sum ind_val_real, detail
sum gdp_pc_real, detail
sum lngdp_pc_real_sq, detail
sum kl_ratio_real, detail
sum emp_dirty_per, detail

*histograms*
hist so2_emi
hist lnso2_emi
hist ind_val_hmt_per
hist ind_val_fie_per
hist ind_val_real
hist lnind_val_real
hist gdp_pc_real
hist lngdp_pc_real
hist lngdp_pc_real_sq
hist kl_ratio_real
hist lnkl_ratio_real
hist emp_dirty_per

*Breusch-Pagan test for heteroskedasticity  for regression a)*
reg lnso2_emi ind_val_hmt_per ind_val_fie_per lnind_val_real lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id
hettest

*Breusch-Pagan test for heteroskedasticity  for regression b)*
reg lnso2_emi lnind_val_real c.ind_val_hmt_per#i.level c.ind_val_fie_per#i.level c.ind_val_hmt_per#i.region c.ind_val_fie_per#i.region lngdp_pc_real lngdp_pc_real_sq lnkl_ratio_real emp_dirty_per i.year i.city_id
hettest level1 level2 level3 level4 region1 region2 region3 region4 region5 region6 region7
